################################################################################################
####### Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
####### Chapter 5: Replication for analysis at the municipal level part 2 
####### R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
####### Platform: x86_64-apple-darwin15.6.0 (64-bit)
############################################################################

rm(list = ls())
library(ggplot2)
library(stargazer)
library(openxlsx)
library(lpdensity)
library(rddensity)
library(rdrobust)
library(rdlocrand)
library(plyr)
library(stringr)
library(xtable)
library(rdd)
library(texreg)

#######################################
###### ANALYSIS OF RELIGIOSITY  #######
#######################################

gminy1<-read.xlsx("chapter5/distances_gm2015.xlsx") #gmina coordinates with distances: 

relig<-read.xlsx("chapter5/Gminy_Prakt_91Charnysz.xlsx", sheet="ZZ_PRUS") #Data for Ziemie zachodnie and Prussian partition only

which(!relig$Kod_Teryt %in% gminy1$jpt_kod_je2)

##Correct some codes for proper merging: 
relig$numer<-as.numeric(as.character(substr(relig$Kod_Teryt,1,nchar(relig$Kod_Teryt)-1)))
gminy1$numer<-as.numeric(as.character(substr(gminy1$jpt_kod_je2, 1, nchar(gminy1$jpt_kod_je2)-1)))
which(!relig$numer %in% gminy1$numer)
relig[which(!relig$numer %in% gminy1$numer), c(6:8, 17)]
relig$numer[relig$NAZWA=="Ślesin"]<-gminy1$numer[gminy1$jpt_nazwa_=="Ślesin"]
relig$numer[relig$NAZWA=="Olszyna"]<-gminy1$numer[gminy1$jpt_nazwa_=="Olszyna"]
relig$numer[relig$NAZWA=="Prószków"]<-gminy1$numer[gminy1$jpt_nazwa_=="Prószków"]
relig$numer[relig$NAZWA=="Wałbrzych"]<-gminy1$numer[gminy1$jpt_nazwa_=="Wałbrzych"]
relig$numer[relig$NAZWA=="Gościno"]<-gminy1$numer[gminy1$jpt_nazwa_=="Gościno"]
relig$numer[relig$NAZWA=="Stepnica"]<-gminy1$numer[gminy1$jpt_nazwa_=="Stepnica"]
relig$numer[relig$NAZWA=="Tychowo"]<-gminy1$numer[gminy1$jpt_nazwa_=="Tychowo"]
relig$numer[relig$NAZWA=="Irządze"]<-gminy1$numer[gminy1$jpt_nazwa_=="Irządze"]
relig$numer[relig$NAZWA=="Kalety"]<-gminy1$numer[gminy1$jpt_nazwa_=="Kalety"]
relig$numer[relig$NAZWA=="Poraj"]<-gminy1$numer[gminy1$jpt_nazwa_=="Poraj"]
relig$numer[relig$NAZWA=="Popów"]<-gminy1$numer[gminy1$jpt_nazwa_=="Popów"]
relig$numer[relig$NAZWA=="Kamienica Polska"]<-gminy1$numer[gminy1$jpt_nazwa_=="Kamienica Polska"]
relig$numer[relig$NAZWA=="Janów"]<-240403
relig$numer[relig$NAZWA=="Prószków"]<-gminy1$numer[gminy1$jpt_nazwa_=="Prószków"]
relig$numer[relig$NAZWA=="Czarna Woda"]<-gminy1$numer[gminy1$jpt_nazwa_=="Czarna Woda"]
relig$numer[relig$NAZWA=="Siechnice"]<-gminy1$numer[gminy1$jpt_nazwa_=="Siechnice"]
relig$numer[relig$NAZWA=="Dziwnów"]<-gminy1$numer[gminy1$jpt_nazwa_=="Dziwnów"]
relig$numer[relig$NAZWA=="Zielona Góra"]<-gminy1$numer[gminy1$jpt_nazwa_=="Zielona Góra"]

relig[which(!relig$numer %in% gminy1$numer), c(6:8, 17)]
relig1<-merge(relig, gminy1, by="numer", all.x=TRUE)

relig1$Type<-ifelse(relig1$NAZDOD %in% c("gmina miejska","miasto", "delegatura"), 1, 0) #original

relig1$distanceB<-ifelse(relig1$Part=="ZZ", relig1$distance, -relig1$distance)
relig1$lon<-as.numeric(as.character(relig1$lon))
relig1$lat<-as.numeric(as.character(relig1$lat))

relig1S<-subset(relig1, Part=="ZZ" | Part=="PRUS")
X<-relig1S$distanceB/1000
PctMass<-100*(relig1S$mszaMK_1991/relig1S$lw_1991)
PctMass[which(relig1S$mszaMK_1991/relig1S$lw_1991>1)]<-NA
PctKom<-100*(relig1S$komMK_1991/relig1S$lz_1991)

Z<-cbind(relig1S$Type, relig1S$lon, relig1S$lat) 



########## Table A9. Mass attendance: 
out1 = rdrobust(PctMass, X,  kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
#summary(out1)

out2 = rdrobust(PctMass, X, covs=Z, kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
#summary(out2)

out3 = rdrobust(PctKom, X,  kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
#summary(out3)

out4 = rdrobust(PctKom, X, covs=Z, kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) 
#summary(out4)

Coefficient<-c(round(out1$coef[1,],2), round(out2$coef[1,],2), round(out3$coef[1,],2), round(out4$coef[1,],2))
SE<-c(round(out1$se[1,],2), round(out2$se[1,],2), round(out3$se[1,],2), round(out4$se[1,],2))
CIs<-c(paste(round(out1$ci[3,1],2), round(out1$ci[3,2],2), sep=", "), paste(round(out2$ci[3,1],2), round(out2$ci[3,2],2), sep=", "), paste(round(out3$ci[3,1],2), round(out3$ci[3,2],2), sep=", "), paste(round(out4$ci[3,1],2), round(out4$ci[3,2],2), sep=", "))
Observations<-c(sum(out1$N), sum(out2$N), sum(out3$N), sum(out4$N))

Kernel<-c(out1$k, out2$k, out3$k, out4$k)
Polynomial<-c(out1$p,out2$p,out3$p,out4$p)
BandwidthType<-c(out1$bwselect,out2$bwselect, out3$bwselect, out4$bwselect)
BandwidthKM<-c(round(out1$bws[1,1], 2), round(out2$bws[1,1], 2), round(out3$bws[1,1],2), round(out4$bws[1,1],2))
EffectiveNUntreated<-c(out1$N_h[1],out2$N_h[1], out3$N_h[1], out4$N_h[1])
EffectiveNTreated<-c(out1$N_h[2],out2$N_h[2], out3$N_h[2], out4$N_h[2]) #syntax varies by version. Alternative is out1$Nh 


table_religion<-as.data.frame(rbind(Coefficient, SE, CIs, Observations, Kernel, Polynomial, BandwidthType, BandwidthKM, EffectiveNTreated, EffectiveNUntreated))
colnames(table_religion)<-c("% Catholics attending mass", "% Catholics attending mass", "% Catholics receiving communion", "% Catholics receiving communion")
rownames(table_religion)<-c("Coefficient", "Standard error (conventional)", "Robust bias-corrected CIs", "Observations", "Kernel type", "Polynomial", "Bandwidth type", "MSE-optimal bandwidth", "Effective # treated", "Effective # untreated")

xtable(table_religion,label = 'tab:religion', caption = 'Regression Discontinuity Analysis: Catholic Church')

#####################################################
########## Loading data for Table A.10: 
#####################################################
dat<-read.xlsx("chapter5/voting1990s_rd.xlsx")

dat$distanceB<-ifelse(dat$Part=="ZZ", dat$distance, -dat$distance)

Subset<-subset(dat, (dat$Part=="ZZ"  | dat$Part=="PRUS") & dat$Woj49!="Bielsko-Biala" & dat$Woj49!="Katowice" & dat$Woj49!="Suwalki"  & dat$Woj49!="Warszawskie" & dat$Woj49!="Opole"   & dat$Woj49!="Czestochowskie")  
table(Subset$Woj49) 
dim(Subset) 

Subset$lat<-as.numeric(as.character(Subset$lat))
Subset$lon<-as.numeric(as.character(Subset$lon))
Subset$distrailM <-as.numeric(as.character(Subset$distrailM))

Subset$lotlanInt<-Subset$lat*Subset$lon
Subset$rural<-ifelse(Subset$TypeGmina=="2", 1, 0)
Subset$town<-ifelse(Subset$TypeGmina=="1", 1, 0)

X<-Subset$distanceB/1000

Z<-cbind(Subset$distrailM/1000, Subset$town, Subset$lon, Subset$lat, Subset$lotlanInt) 


######  Presidential election 1990 

WalesaR190 <-Subset$WalesaR190*100
WalesaR190 = rdrobust(WalesaR190, X, covs=Z, kernel = "triangular", p = 1, vce="hc1", bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(WalesaR190)

WalesaR290 <-Subset$WalesaR290*100
WalesaR290 = rdrobust(WalesaR290, X, covs=Z, kernel = "triangular", p = 1,vce="hc1", bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(WalesaR290)

######  Parliamentary election 1991 

PctSLD91 <-Subset$PctSLD91*100 
voteSLD91 = rdrobust(PctSLD91, X, covs=Z, kernel = "triangular", p = 1,  vce="hc1",bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(voteSLD91) 


#Solidarity vote
PctNSZZBlock91 <-Subset$PctNSZZBlock91*100 
PctNSZZBlock91 = rdrobust(PctNSZZBlock91, X, covs=Z, kernel = "triangular", p = 1,  vce="hc1", bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(PctNSZZBlock91)

PctSLD93 <-Subset$PctSLD93*100 
voteSLD93 = rdrobust(PctSLD93, X, covs=Z, kernel = "triangular", p = 1, vce="hc1", bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(voteSLD93) 


PctUD93 <-Subset$PctUD93*100
voteUD93 = rdrobust(PctUD93, X, covs=Z, kernel = "triangular", p = 1, vce="hc1", bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(voteUD93) 

###########################################################
#### Table A10: Election results in 1990, 1991 & 1993 ##### 
###########################################################

Coefficient<-c(round(WalesaR190$coef[1,],2), round(WalesaR290$coef[1,],2), round(voteSLD91$coef[1,],2), round(PctNSZZBlock91$coef[1,],2), round(voteSLD93$coef[1,],2), round(voteUD93$coef[1,],2))
SE<-c(round(WalesaR190$se[1,],2), round(WalesaR290$se[1,],2), round(voteSLD91$se[1,],2), round(PctNSZZBlock91$se[1,],2), round(voteSLD93$se[1,],2), round(voteUD93$se[1,],2))
CIs<-c(paste(round(WalesaR190$ci[3,1],2), round(WalesaR190$ci[3,2],2), sep=", "), paste(round(WalesaR290$ci[3,1],2), round(WalesaR290$ci[3,2],2), sep=", "), paste(round(voteSLD91$ci[3,1],2), round(voteSLD91$ci[3,2],2), sep=", "), paste(round(PctNSZZBlock91$ci[3,1],2), round(PctNSZZBlock91$ci[3,2],2), sep=", "), paste(round(voteSLD93$ci[3,1],2), round(voteSLD93$ci[3,2],2), sep=", "), paste(round(voteUD93$ci[3,1],2), round(voteUD93$ci[3,2],2),sep=", "))
Observations<-c(sum(WalesaR190$N), sum(WalesaR290$N), sum(voteSLD91$N), sum(PctNSZZBlock91$N), sum(voteSLD93$N), sum(voteUD93$N))

Kernel<-c(WalesaR190$k, WalesaR290$k, voteSLD91$k, PctNSZZBlock91$k, voteSLD93$k, voteUD93$k)
Polynomial<-c(WalesaR190$p,WalesaR290$p,voteSLD91$p,PctNSZZBlock91$p, voteSLD93$p, voteUD93$p)
BandwidthType<-c(WalesaR190$bwselect,WalesaR290$bwselect, voteSLD91$bwselect, PctNSZZBlock91$bwselect, voteSLD93$bwselect, voteUD93$bwselect)
BandwidthKM<-c(round(WalesaR190$bws[1,1], 2), round(WalesaR290$bws[1,1], 2), round(voteSLD91$bws[1,1],2), round(PctNSZZBlock91$bws[1,1],2), round(voteSLD93$bws[1,1],2), round(voteUD93$bws[1,1],2))
EffectiveNUntreated<-c(WalesaR190$N_h[1],WalesaR290$N_h[1], voteSLD91$N_h[1], PctNSZZBlock91$N_h[1], voteSLD93$N_h[1], voteUD93$N_h[1]) 

#Note for some versions of RDrobust, code is EffectiveNUntreated<-c(out1$N_h[1], out2$N_h[1], out3$N_h[1], out4$N_h[1], out5$N_h[1], out6$N_h[1], out7$N_h[1])

EffectiveNTreated<-c(WalesaR190$N_h[2],WalesaR290$N_h[2], voteSLD91$N_h[2], PctNSZZBlock91$N_h[2], voteSLD93$N_h[2], voteUD93$N_h[2])

#Note for some versions of RDrobust, code is: EffectiveNTreated<-c(out1$N_h[2], out2$N_h[2], out3$N_h[2], out4$N_h[2], out5$N_h[2], out6$N_h[2], out7$N_h[2])

elections_rd<-as.data.frame(rbind(Coefficient, SE, CIs, Observations, Kernel, Polynomial, BandwidthType, BandwidthKM, EffectiveNTreated, EffectiveNUntreated))
colnames(elections_rd)<-c("Walesa R1","Walesa R2", "SLD 1991", "Solidarity 1991", "SLD 1993", "UD 1993")
rownames(elections_rd)<-c("Coefficient", "Standard error (conventional)", "Robust bias-corrected CIs", "Observations", "Kernel type", "Polynomial", "Bandwidth type", "MSE-optimal bandwidth", "Effective # treated", "Effective # untreated")

xtable(elections_rd,label = 'tab:rd.elections', caption = 'Regression Discontinuity Analysis: Post-Communist Elections')

###################################################################
##### Creating Figure 5.5 + running alternative specifications ####
###################################################################

rdmod2 <- RDestimate(PctSLD91 ~ X | Z,  kernel = "triangular", cutpoint=0, bw=30)
summary(rdmod2)

fig5.5a <- function(){
  plot(rdmod2, range = c(-20, 20))
  title(main = "Support for communist-successor parties in 1991", xlab = "Non-resettled <-- (distance to the 1919 border, km) --> Resettled", ylab="SLD vote in 1991")
  abline(v = 0, col = "red")
}

fig5.5a()

rdmod3<- RDestimate(PctSLD93 ~ X | Z,  kernel = "triangular", cutpoint=0, bw=28)
summary(rdmod3)

fig5.5b <- function(){
  plot(rdmod3, range = c(-20, 20))
  title(main = "Support for communist-successor parties in 1993", xlab = "Non-resettled <-- (distance to the 1919 border, km) --> Resettled", ylab="SLD vote in 1993")
  abline(v = 0, col = "red")
}

fig5.5b()

####################################################################################
########## Figure 5.1 Checking for discontinuity in migration at the border
###################################################################################
ShareMig88<-Subset$FromBirthNo1988/Subset$TotPop1988 

### Manipulation check: Share migrants in 1988
out = rdrobust(ShareMig88, X, covs=Z, kernel = "triangular", p = 1, bwselect = "mserd", scaleregul = 1, all=TRUE) #
summary(out)# Negative and sig: migration treatment works. 
bandwidth = rdrobust(ShareMig88, X, covs=Z, kernel = "triangular", p = 1, bwselect = "mserd")$bws[1, 1]
out = rdplot(ShareMig88[abs(X) <= bandwidth], X[abs(X) <= bandwidth],p = 1, kernel = "triangular",  binselect = "qsmv", x.label="Non-resettled <-- (distance to the 1919 border, km) --> Resettled", y.label="Share living from birth in 1988", title="")
summary(out)

rdmod4<- RDestimate(ShareMig88 ~ X | Z,  kernel = "triangular", cutpoint=0, bw=25)
summary(rdmod4)

##Figure 5.1
fig5.1 <- function(){
  plot(rdmod4, range = c(-20, 20))
  title(main = "Population status based on the 1988 census", xlab = "Non-resettled <-- (distance to the 1919 border, km) --> Resettled", ylab="Share living from birth")
  abline(v = 0, col = "red")
}

fig5.1()


########################################################################
################## Table 5.4: Voting for ex-Communists  ################
################## OLS WITHIN the resettled region #####################
########################################################################

dat2<-read.xlsx("chapter5/WithinAnalysis_Municipalities.xlsx")

cse = function(reg) {
  rob = sqrt(diag(vcovHC(reg, type = "HC1")))
  return(rob)
}
hc1<-function(x) vcovHC(x, type="HC1")  

################################################################################################
################################################################################################

dat2$Herf<-1-(dat2$PctUSSR^2+dat2$PctCP^2+dat2$PctOther^2+dat2$PctAut^2)
dat2$City<-ifelse(dat2$PctUrb1948>0.90, 1,0)
dat2$shareLargest<-rep(NA, nrow(dat2))
for (i in 1:nrow(dat2)){
  dat2$shareLargest[i]<-max(dat2$PctUSSR[i], dat2$PctCP[i], dat2$PctOther[i], dat2$PctAut[i])
}

### Factor for which group is largest
dat2$whichLargest<-rep(NA, nrow(dat2))
dat2$whichLargest[dat2$shareLargest==dat2$PctUSSR]<-"USSR"
dat2$whichLargest[dat2$shareLargest==dat2$PctCP]<-"CP"
dat2$whichLargest[dat2$shareLargest==dat2$PctOther]<-"Other"
dat2$whichLargest[dat2$shareLargest==dat2$PctAut]<-"Aut"
dat2$whichLargest<-relevel(as.factor(dat2$whichLargest), ref="Other")

dat2$ShareVoluntary<-dat2$PctCP+dat2$PctOther
dat2$TotPop48Logged<-log(dat2$TotPop48)

reg1<-lm(PctSLD91 ~Herf, data=dat2)

reg2<-lm(PctSLD91 ~Herf+PctUrb1948+ SharePrzemysl1939 + ShareOver100 + DistLandBorder + DistPow1950+distrail48+log(TotPop48)+ GermanProvince, data=dat2)

reg3<-lm(PctSLD93 ~Herf, data=dat2)

reg4<-lm(PctSLD93 ~Herf+PctUrb1948+ SharePrzemysl1939 + ShareOver100 + DistLandBorder +DistPow1950+ distrail48+ log(TotPop48) + GermanProvince, data=dat2)


Table5.4 <- texreg(
  l = list(reg1, reg2, reg3, reg4),
  override.se=list(coeftest(reg1, vcov.=hc1)[,2], coeftest(reg2, vcov.=hc1)[,2], coeftest(reg3, vcov.=hc1)[,2], coeftest(reg4, vcov.=hc1)[,2]),
  override.pval=list(coeftest(reg1, vcov.=hc1)[,4], coeftest(reg2, vcov.=hc1)[,4], coeftest(reg3, vcov.=hc1)[,4], coeftest(reg4, vcov.=hc1)[,4]),     
  custom.model.names = c("SLD vote in 1991","SLD vote in 1991","SLD vote in 1993","SLD vote in 1993"),
  custom.coef.names=c("Fractionalization index",  "Share urban", "Share in industry (1939)", "Share over 100 ha (1939)", "Distance to German border", "Distance to railway",  "Log(Population)"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:electionsWithin",
  omit.coef = "(Intercept)|GermanProvince|DistPow1950", 
  include.rsquared = FALSE
)

Table5.4




